clear all 
capture log close

global data "/Users/ceweber/Dropbox/JPAM_TIV_code/original_data/"
global dta "/Users/ceweber/Dropbox/JPAM_TIV_code/dta_files/"
global dataclean "/Users/ceweber/Dropbox/JPAM_TIV_code/dta_output/"
global output "/Users/ceweber/Dropbox/JPAM_TIV_code/output/"

set more off
cd "$dataclean"

***CONSTRUCT SOME VARIABLES FOR DATA CLEANING:
use collapsed_retail_wholesale_daily_clean_part1.dta, clear 
xtset salesloc date

/*generate variables from this dataset*/
	
*generate info on when location was operating 
	*dates of sales
	gen tmp=date if salesloc<.&usableweight>0&usableweight<. //for all saleslocations (that aren't .) copy date into a tmp variable 
		egen firstsaledate=min(tmp), by(salesloc) //use this tmp to generate the first date of sale for a location
		egen lastsaledate=max(tmp), by(salesloc) //use this tmp to generate the last date of sale for a location
		
save firstlast.dta, replace
		
	replace tmp=. if date==firstsaledate 
		egen secondsaledate=min(tmp), by(salesloc)
		
	replace tmp=. if date==lastsaledate
		egen nextlastsaledate=max(tmp), by(salesloc)
		
	replace tmp=. if date==secondsaledate
		egen thirdsaledate=min(tmp), by(salesloc)
		
	replace tmp=. if date==nextlastsaledate
		egen nextnextlastsaledate=max(tmp), by(salesloc)

	*months of operation
	sort salesloc date
	gen monthclose=date[_n]>date[_n-1]+30
	gen tmpmonthclosedate=date if monthclose==1
	gen twoweekclose=date[_n]>date[_n-1]+15
	gen tmptwoweekclosedate=date if twoweekclose==1


	
*clean up the matrix *
fillin salesloc date //generate an observation for every salesloc date pair
	rename _fillin nosales //rename the variable that tracked the fillins as "nosales". That's why it was filled in, there were no sales that day
 
	*clean up the rest of the row for that salesloc date pair
	replace items=0 if nosales==1
	replace usableweight=0 if nosales==1
	replace sale=0 if nosales==1

	egen tmpx=max(firstsaledate), by(salesloc)
	replace firstsaledate=tmpx if nosales==1
	drop tmpx
	
		egen tmpx=max(nextlastsaledate), by(salesloc)
	replace nextlastsaledate=tmpx if nosales==1
	drop tmpx
	
		egen tmpx=max(nextnextlastsaledate), by(salesloc)
	replace nextnextlastsaledate=tmpx if nosales==1
	drop tmpx

	egen tmpx=max(lastsaledate), by(salesloc)
	replace lastsaledate=tmpx if nosales==1
	drop tmpx	
	
	egen tmpx=max(secondsaledate), by(salesloc)
	replace secondsaledate=tmpx if nosales==1
	drop tmpx
	

	
/*Merge in other data we care about*/

gen monthly=mofd(date) //generate variable to help us with merges, some of the info we merge in is at monthly level, this allows us to bring that in 
gen weekly=wofd(date)

*merge in location data (zip code, address, state, etc)
cd "$data"
	mmerge salesloc using "locations.dta", type(n:1) unmatched(master) umatch(id)
	
	egen tmpx=max(zip), by(salesloc)
	replace zip=tmpx if nosales==1
	drop tmpx
	
	rename zip zipnum
	gen zip9=string(zipnum, "%12.0g")
	gen zip=substr(zip9,1,5)
	
*merge in hours variable:
cd "$data"
mmerge salesloc date using  "hours.dta", type(n:1) unmatched(master) umatch(location date)

*code up holidays where we see firms often sell for just one hour:
forvalues k = 1(1)81 {
	gen hol`k' = 0
}

			replace hol1 = 1 if date==20089 //New Years Day
			replace hol1 = 1 if date==20454 
			replace hol1 = 1 if date==20820 
			
			replace hol53 = 1 if date==20054 //thanksgiving			
			replace hol53 = 1 if date==20418 
			replace hol53 = 1 if date==20782		 		 		  
			replace hol53 = 1 if date== 21146	
			
			replace hol66 = 1 if date==20082 //christmas			
			replace hol66 = 1 if date==20447 
			replace hol66 = 1 if date==20813
			replace hol66 = 1 if date==21178
			
			
gen hol=0
gen holnum=0
forvalues k = 1(1)66 {
	replace hol = 1 if hol`k'==1
	replace holnum=`k' if hol`k'==1
}
drop hol1-hol66
sort salesloc date
replace tothours=L.tothours if hol==1

*drop firms not in business:
	sort salesloc date
	gen inBusiness=0 
	replace inBusiness=1 if date-firstsaledate>=0&date-firstsaledate<.
	replace inBusiness=0 if date-lastsaledate>0&date-lastsaledate<.
	drop if inBusiness==0
	
*we do not keep firms where we observe sales for less than 60 days (32 days in our data after we drop the first 14 and the last 14):
	gen tmpuw=usableweight if usableweight>0&usableweight<.
	egen countsales=count(tmpuw), by(salesloc)
	drop tmpuw
	tab countsales
	drop if countsales<60
	

	
*drop where sell once or twice and then closed for more than a week (and do equivalent rule based on last sale date)
gen tmpd=date if usableweight>0
forvalues i=1(1)10{
replace tmpd=. if firstsaledate==date
egen tmpdmin=min(tmpd), by(salesloc)
replace firstsaledate=tmpdmin if tmpdmin-firstsaledate>=7&tmpdmin-firstsaledate<.
drop tmpdmin
}
drop tmpd

gen tmpd=date  if usableweight>0&date<=lastsaledate
forvalues i=1(1)10{
replace tmpd=. if lastsaledate==date
egen tmpdmax=max(tmpd), by(salesloc)
replace lastsaledate=tmpdmax if lastsaledate-tmpdmax>=7&lastsaledate-tmpdmax<.
drop tmpdmax
}
drop tmpd

replace inBusiness=0 if date<firstsaledate
replace inBusiness=0 if date>lastsaledate
drop if inBusiness==0



*drop firms before their last five day close at beginning of data, after at the end of the data, and count as new firm in the middle:
gen fivedayclose=0
replace fivedayclose=1 if usableweight<=1&L.usableweight<=1&L2.usableweight<=1&L3.usableweight<=1&L4.usableweight<=1
replace fivedayclose=1 if (F.fivedayclose==1|F2.fivedayclose==1|F3.fivedayclose==1|F4.fivedayclose==1 )
gen tmph=date if fivedayclose==1
egen minfivedayclose=min(tmph), by(salesloc)
egen maxfivedayclose=max(tmph), by(salesloc)

*egen nextminfivedayclose=min(tmph), by(salesloc)

gen extendedclose=0
replace extendedclose=1 if fivedayclose==1
*then if open for a few days sandwiched between two fivedaycloses, keep extendedclose flag on:
replace extendedclose=1 if (F.fivedayclose==1|F2.fivedayclose==1|F3.fivedayclose==1)&(L.fivedayclose==1|L2.fivedayclose==1|L3.fivedayclose==1)
*then if fiveday close near the beginning or end of the time series (and often reflects other haphazard open/close decisions, drop everything before (near opening) or after (near closing) the five day close:
replace extendedclose=1 if minfivedayclose-firstsaledate<60&date<minfivedayclose
replace extendedclose=1 if lastsaledate-maxfivedayclose<60&date>maxfivedayclose
replace inBusiness=0 if extendedclose==1
drop tmph

*make figures and drop rules based on one hour per day reporting firms:

gen tmphmax=date if tothours==1
egen maxtothours1=max(tmphmax), by(salesloc)
gen tmphmin=date if tothours==1
egen mintothours1=min(tmphmin), by(salesloc)
gen tmpmax=maxtothours1
gen tmpmin=mintothours1
replace maxtothours1=. if maxtothours1-mintothours1<=7
replace tmphmax=. if tmpmax-tmpmin<=7
replace mintothours1=. if tmpmax-tmpmin<=7
replace tmphmin=. if tmpmax-tmpmin<=7
drop tmpmax tmpmin
forvalues i=1(1)10{
replace tmphmax=. if maxtothours1==date
egen tmphmax2=max(tmphmax), by(salesloc)
replace maxtothours1=tmphmax2 if maxtothours1-tmphmax2>=60&maxtothours1-tmphmax2<.
replace tmphmin=. if mintothours1==date
egen tmphmin2=min(tmphmin), by(salesloc)
replace mintothours1=tmphmin2 if mintothours1-tmphmin2>=60&mintothours1-tmphmin2<.
drop tmphmax2 tmphmin2
}
drop tmphmax tmphmin

gen tmpm=tothours if date>mintothours1&date<maxtothours1
egen meanprefrac=mean(tmpm), by(salesloc)
drop tmpm
gen tmpm=tothours if date>maxtothours1&date<maxtothours1+60
egen meanpostfrac=mean(tmpm), by(salesloc)
drop tmpm

*sometimes firms have two separate episodes, so create a nextmin/nextmax more than 60 days away:
gen tmphmax=date if tothours==1&date<=maxtothours1
gen tmphmin=date if tothours==1&date>=mintothours1
replace tmphmax=. if maxtothours1-date<=180
replace tmphmin=. if date-mintothours1<=180
egen nextmaxtothours1=max(tmphmax), by(salesloc)
egen nextmintothours1=min(tmphmin), by(salesloc)
*now two pairs: minmis and nextmaxmis/nextminmis and maxmis
gen tmpm=tothours if date>mintothours1&date<nextmaxtothours1
egen meanprefrac1=mean(tmpm), by(salesloc)
drop tmpm
gen tmpm=tothours if date>nextmaxtothours1&date<nextmaxtothours1+60
egen meanpostfrac1=mean(tmpm), by(salesloc)
drop tmpm
gen tmpm=tothours if date>nextmintothours1&date<maxtothours1
egen meanprefrac2=mean(tmpm), by(salesloc)
drop tmpm
gen tmpm=tothours if date>maxtothours1&date<maxtothours1+60
egen meanpostfrac2=mean(tmpm), by(salesloc)
drop tmpm

gen tothours1=0
replace tothours1=1 if mintothours1<maxtothours1&mintothours1!=.&meanprefrac>meanpostfrac&date>=mintothours1
replace tothours1=1 if mintothours1<maxtothours1&mintothours1!=.&meanprefrac<meanpostfrac &date<=maxtothours1 
 *turn off at beginning of time series until violation unless violation near the begining of the data set:
 replace tothours1=0 if mintothours1<maxtothours1&mintothours1!=.&meanprefrac<meanpostfrac&date<=mintothours1&mintothours1>firstsaledate+120
  *turn off at end of time series after violation unless violation near the end of the data set:
 replace tothours1=0 if mintothours1<maxtothours1&mintothours1!=.&meanprefrac>meanpostfrac&date>maxtothours1&date&maxtothours1<lastsaledate-120
 *turn off in the middle if extended period of time in which no violations:
 replace tothours1=0 if mintothours1<maxtothours1&mintothours1!=.&meanprefrac>meanpostfrac&date>mintothours1&maxtothours1-nextmaxtothours1>=240&nextmaxtothours1<.&maxtothours1<.&nextmintothours<.
  replace tothours1=1 if mintothours1<maxtothours1&mintothours1!=.&meanprefrac2>meanpostfrac2&date>=nextmintothours1&maxtothours1-nextmaxtothours1>=240&nextmaxtothours1<.&maxtothours1<.&nextmintothours<.
  replace tothours1=1 if mintothours1<maxtothours1&mintothours1!=.&meanprefrac1<meanpostfrac1&date<=nextmaxtothours1&maxtothours1-nextmaxtothours1>=240&nextmaxtothours1<.&maxtothours1<.&nextmintothours<.
 drop meanpre* meanpost* tmphmax tmphmin 
*replace inBusiness=0 if tothours1==1

drop tmp*
cd "$dataclean"
save cleaned_part1.dta, replace

*create a better measure of inbusiness to merge back in:
use "cleaned_part1.dta", clear
set more off
sum inBusiness

//we are going to make inBusiness less sensitive, we allow it turn on if it is on for at one month
//we allow it turn off if it is off for at least two weeks

**i start by defaulting the code to always off, then we look for the first period of at least one month,
**after the code is on for at least one month, we turn inBusiness off if it is off for at least two weeks 

gen inBusiness2 = . //this will be the variable the this bit of code is producing 

gen temp = inBusiness 
tsset salesloc date
tsspell temp
browse salesloc date inBusiness inBusiness2 temp _spell _seq _end
sort salesloc _spell
bysort salesloc _spell: egen count2 = max(_seq)

sort salesloc date 
replace inBusiness2 = 1 if inBusiness==1&count2>30
bysort salesloc: carryforward inBusiness2, replace

replace inBusiness2=0 if inBusiness2==.

/*
//now kick out periods that are at least two weeks of off
replace inBusiness2=0 if inBusiness==0&count2>14
*/
replace inBusiness2=0 if inBusiness==0

//now that we've kicked these out, make sure there aren't weird "inBusiness remnants that are short"
drop _spell _seq _end count2 
tsspell inBusiness2
sort salesloc _spell
bysort salesloc _spell: egen count2 = max(_seq)
replace inBusiness2 = 0 if inBusiness2==1&count2<30

egen group2 = group(salesloc)
sum group2 
local max = r(max)

*diagnostic plots
/*forvalues j = 1(1)`max' {
	sum salesloc if group2==`j'
	local title=r(mean)
	sort date
	twoway line inBusiness inBusiness2 date if group2==`j', yscale(range(0 1))
	
	graph export pic_`title'_`l'.png, replace
	
	}	*/

save inbusiness.dta, replace


use cleaned_part1.dta, clear

cd "$dataclean"
 *this is an updated inBusiness variable that takes what we do above and eliminates any spells that turn on for less than a month:
mmerge salesloc date using inbusiness.dta, ukeep(inBusiness inBusiness2 tothours1) unmatched(master) update replace
replace inBusiness=inBusiness2
drop inBusiness2
cd "$dataclean"
save cleaned_retail_data.dta, replace


use "cleaned_retail_data.dta", clear
collapse (mean) inBusiness tothours1 , by(salesloc weekly)
save "cleaned_retail_data_indicators_weekly.dta", replace

cd "$dta"
use "inventory-strain-clean-map.dta", clear
drop if strain_final==""
cd "$dataclean"
save "inventory-strain-clean-map-tmp.dta", replace

***COLLAPSE DATA BY INVENTORY LOT: 
*this code came from dispensaries_collapseclean3.do
cd "$dataclean"
use matched_wholesale_retail_clean.dta, clear

*egen sale=tag(transactionid) 

gen profit = .
replace profit = (pricepergram - pricepergramwh)*usableweight //if inventorytype==28
//replace profit = (totalsales - unitprice_wholesale) if inventorytype!=28
 
 
gen pricepergram_raw=price/usableweight

gen weightgroup=1 if usableweight<=1
replace weightgroup=2 if usableweight>1&usableweight<=2
replace weightgroup=3 if usableweight>2&usableweight<=3
replace weightgroup=4 if usableweight>3&usableweight<=4
replace weightgroup=5 if usableweight>4&usableweight<=8
replace weightgroup=6 if usableweight>8&usableweight<.


*sale 
*prodorg
*now collapse at weekly-invlot-location level:
collapse (sum) items usableweight usableweightwh totalsales (mean) procloc pricepergramwh pricepergram_raw  daysfromwholesale  potency_thctotal potency_cbd /*logmeanprice pricepergram_dmean logmeantiprice tipricepergram_dmean*/, by(weekly salesloc inventoryid parentlot weightgroup) fast


save invlotcollapseweightnew_weekly_clean.dta, replace


use invlotcollapseweightnew_weekly_clean.dta, clear /*created in dispensaries_collapseclean3.do*/

rename salesloc location

mmerge weekly location using "cleaned_retail_data_indicators_weekly.dta", type(n:1) umatch(weekly salesloc) ukeep(inBusiness tothours1) unmatched(master) update replace
replace inBusiness=0 if inBusiness==.

drop if inBusiness<1
gen TaxChange=weekly>=2886
gen run=weekly-2886
gen runtax=run*TaxChange
egen firstsaledate=min(weekly), by(location)
egen lastsaledate=max(weekly), by(location)
keep if firstsaledate<2886-7&lastsaledate>=2886+7

replace tothours1=1-tothours1
egen meanhours9=mean(tothours1) if abs(run)<9, by(location)
egen meanhours8=mean(tothours1) if abs(run)<8, by(location)

egen maxmeanhours9=max(meanhours9), by(location)
replace meanhours9=maxmeanhours9
drop maxmeanhours9
egen maxmeanhours8=max(meanhours8), by(location)
replace meanhours8=maxmeanhours8
drop maxmeanhours8

drop if meanhours9!=1

gen weight7=usableweight>=6.9
gen tmp=usableweight if weekly>=2878&weekly<=2885
egen weightbyproc=total(tmp), by(location procloc)
egen maxproc=max(weightbyproc), by(location)
gen top_processor=0
replace top_processor=1 if maxproc==weightbyproc

egen firstprocdate=min(weekly), by(procloc location)
gen earlyproc=firstprocdate<2847
gen earlyproc2=firstprocdate<2860
gen earlyretail=firstsaledate<2847
gen earlyretail2=firstsaledate<2860


cd "$dataclean"
 mmerge location weekly using multiplier_maxtax_all.dta, type(n:1) unmatched(master) update replace
 
gen tipricepergram_pn=pricepergram_raw*taxmultsales
replace tipricepergram_pn=pricepergram_raw*taxmultboth if weekly>=2886
replace tipricepergram=pricepergram_raw if weekly<2886&roundpre>.85&roundpre<.&maxtaxpre==float(1)
replace tipricepergram=pricepergram_raw*1.37 if weekly>=2886&location==670 /*not super round and lots of ties, but if it's not 1.37, then it seems the pre-period should also change*/
replace tipricepergram=pricepergram_raw*1.37 if weekly>=2886&location==374
replace tipricepergram=pricepergram_raw*maxtaxmultfinal if weekly>=2886&location==356  

gen taxmultpn="pn"
replace taxmultpn="nonetaxmultboth" if roundpre>.85&roundpre<.&maxtaxpre==float(1)
replace taxmultpn="nonetaxmultexcise" if location==670|location==356|location==374



replace taxmultleastchange="nonetaxmultboth" if taxmultleastchange=="nonetaxmultbothdiv"&(location==227|location==623|location==685|location==714)
*NOTE: 227/714 are both multipliers with transitions not long after the tax change (less than 8 weeks) -- all evidence points to them charging the sales tax on a round price and then adjusting later (and iwth these multipliers, will not cause a multiplier change later)
*NOTE 2: 623/685 are cases where this scheme persists for much longer (or until the end of the data). but same adjustment needed.
replace taxmultleastchange="nonetaxmultboth" if taxmultleastchange=="pn"&(location==227|location==623|location==685|location==714)
replace taxmultleastchange="pn" if taxmultleastchange=="nonetaxmultbothdiv"


gen tipricepergram_lc=pricepergram_raw*taxmultsales
replace tipricepergram_lc=pricepergram_raw*taxmultboth if weekly>=2886
replace tipricepergram_lc=pricepergram_raw if weekly<2886&taxmultleastchange=="nonetaxmultboth"
replace tipricepergram_lc=pricepergram_raw if weekly<2886&taxmultleastchange=="nonetaxmultexcise"
replace tipricepergram_lc=pricepergram_raw*1.37 if weekly>=2886&taxmultleastchange=="nonetaxmultexcise"
replace tipricepergram_lc=pricepergram_raw*taxmultboth if weekly>=2886&taxmultleastchange=="salestaxmultbothdiv"
  
gen logtiprice_pn=log(tipricepergram_pn)
gen logtiprice_lc=log(tipricepergram_lc)

count
gen mergekeep=1

mmerge inventoryid using "inventory-strain-clean-map-tmp.dta",   unmatched(master) type(n:1)
drop if mergekeep==.
count

gen week1=weekly==2886
egen group_ls=group(strain_final location weightgroup)
 

drop if procloc==.
drop if strain==""
count if location==.

save incidence_weeklyweightanal.dta, replace

 ***create a variable for manufacturer hetero:
cd "$dataclean"
set more off
use invlotcollapseweightnew_weekly_clean.dta, clear /*created in dispensaries_collapseclean3.do*/

rename salesloc location
drop if procloc==.
 
 egen procsize=total(usableweight), by(procloc weekly)
egen procretailsize=total(usableweight), by(procloc location weekly)
  gen procretailshare=procretailsize/procsize
  
 
  collapse (mean) procretailshare  , by(location procloc weekly) fast
   
  save procretailshares.dta, replace 
 
 ***PUT TOGETHER FINAL ANALYSIS DATA SET:
 cd "$dataclean"
use incidence_weeklyweightanal.dta, clear
set more off

egen group_pl=group(parentlot)
egen group_s=group(strain_final)
egen firstpldate=min(weekly), by(parentlot location)


***GENERATE DIF DEPENDENT VARS:
gen taxrates=taxmultboth if TaxChange==0
replace taxrates=taxmultboth if TaxChange==1
gen tmpx=taxrates if weekly==2885
egen taxratepre=mean(tmpx), by(location)
drop tmpx
gen tmpx=taxrates if weekly==2887
egen taxratepost=mean(tmpx), by(location)
drop tmpx


gen taxchangepct=log(taxratepost/taxratepre)
egen tagloc=tag(location) if taxchangepct<.
summ taxchangepct if tagloc==1

gen pricepergram_pn=tipricepergram_pn/taxrates
gen logprice_pn=log(pricepergram_pn)

gen pricepergram_lc=tipricepergram_lc/taxrates
gen logprice_lc=log(pricepergram_lc)

***CONCENTRATION INDEX:  
egen tag=tag(procloc location) 
egen numproc=total(tag), by(location)
egen tagretail=tag(location) 
summ numproc if tagretail==1
 gen tmpx=usableweight if  weekly>=2886-8&weekly<2886
 egen totweightloc=total(tmpx), by(location)
drop tmpx
 gen tmpx=usableweight if  weekly>=2886-8&weekly<2886
 egen totweightprocloc=total(tmpx), by(location procloc)
drop tmpx
replace totweightprocloc=. if totweightprocloc==0

gen frac=totweightprocloc/totweightloc if tag==1
egen minweightfrac=min(frac), by(location)
egen tmpx=total(minweightfrac) if frac==minweightfrac, by(location)
egen totweightmin=max(tmpx), by(location)
drop tmpx
egen tagprocweight=tag(procloc location) if frac>minweightfrac&frac<.

replace frac=. if frac==minweightfrac
egen minweightfracx=min(frac), by(location)
egen tmpx=total(minweightfracx) if frac==minweightfracx, by(location)
egen totweightminx=max(tmpx), by(location)
drop tmpx
replace totweightmin=totweightmin+totweightminx 
egen tagx=tag(procloc location) if frac>minweightfracx&frac<.
replace tagprocweight=tagx if totweightmin<.25

forvalues i=2(1)18{
di `i'
replace frac=. if frac==minweightfracx
drop minweightfracx totweightminx tagx
egen minweightfracx=min(frac), by(location)
egen tmpx=total(minweightfracx) if frac==minweightfracx, by(location)
egen totweightminx=max(tmpx), by(location)
drop tmpx
replace totweightmin=totweightmin+totweightminx 
egen tagx=tag(procloc location) if frac>minweightfracx&frac<.
replace tagprocweight=tagx if totweightmin<.25
}

egen numproc4=total(tagprocweight), by(location)

*the average tax change experienced by retail firms is 7% (because goes from taxing the tax to not taxing the tax)
summ taxchangepct  

gen Placebo=weekly>2886+52
gen prun=weekly-(2886+52)
gen prunplacebo=prun*Placebo


mmerge  inventoryid procloc location parentlot using full_wholesale_clean_forincidence.dta, type(n:1) unmatched(master) umatch( inventoryid procloc salesloc parentlot) ukeep(weekly) urename(weekly weeklywh)
egen group_i=group(inventoryid)
egen firstinvlotdate=min(weekly), by(group_i)
gen weeksincefirstsale=weekly-firstinvlotdate


drop Placebo
gen Placebo=weekly>=2886+52
drop prunplacebo
gen prunplacebo=prun*Placebo
 

gen week1placebo=weekly==2886+52

drop if parentlot==""

gen logpricewh=log(pricepergramwh)
egen groupx=group(inventoryid  weightgroup)
drop if weightgroup==.
xtset groupx weekly 



keep if pricepergramwh>=1


***FIRST DIFF VARS:
gen pricechange=0
replace pricechange=1 if abs(tipricepergram_pn-L.tipricepergram_pn)>.01&tipricepergram_pn<.&L.tipricepergram_pn<.
replace pricechange=. if tipricepergram_pn<.&L.tipricepergram_pn==.

 gen pricechange2=0
replace pricechange2=1 if abs(tipricepergram_pn-L.tipricepergram_pn)>.1&tipricepergram_pn<.&L.tipricepergram_pn<.
replace pricechange2=. if tipricepergram_pn<.&L.tipricepergram_pn==.


gen pricediff=0
replace pricediff=logprice_pn-L.logprice_pn if tipricepergram_pn<42&L.tipricepergram_pn<42 

gen pricediff_lc=0
replace pricediff_lc=logprice_lc-L.logprice_lc if tipricepergram_lc<42&L.tipricepergram_lc<42 

gen pricediff2=0
replace pricediff2= pricepergram_pn-L.pricepergram_pn 

gen pricediffti=0
replace pricediffti=logtiprice_pn-L.logtiprice_pn

gen taxdiff=0
replace taxdiff=TaxChange-L.TaxChange 

gen FTaxChange=weekly>=2887
gen Ftaxdiff=0
replace Ftaxdiff=FTaxChange-L.FTaxChange

gen F2TaxChange=weekly>=2888
gen F2taxdiff=0
replace F2taxdiff=F2TaxChange-L.F2TaxChange

 gen LTaxChange=weekly>=2885
gen Ltaxdiff=0
replace Ltaxdiff=LTaxChange-L.LTaxChange 

 gen L2TaxChange=weekly>=2884
gen L2taxdiff=0
replace L2taxdiff=L2TaxChange-L.L2TaxChange 

 gen L3TaxChange=weekly>=2883
gen L3taxdiff=0
replace L3taxdiff=L3TaxChange-L.L3TaxChange  

gen F3TaxChange=weekly>=2889
gen F3taxdiff=0
replace F3taxdiff=F3TaxChange-L.F3TaxChange 

gen F4TaxChange=weekly>=2890
gen F4taxdiff=0
replace F4taxdiff=F4TaxChange-L.F4TaxChange 


gen L4group=weekly<=2882
gen F5TaxChange=weekly>=2891
 
gen difflength=0
replace difflength=1 if tipricepergram_pn<.&L.tipricepergram_pn<.

  gen placebodiff=0
replace placebodiff=Placebo-L.Placebo if Placebo<.&L.Placebo<.

gen FPlacebo=weekly>=2886+52+1
gen Fplacebodiff=0
replace Fplacebodiff=FPlacebo-L.FPlacebo

gen weight2=usableweight+L.usableweight
 gen weight3=L.usableweight
  gen Fweight3=L2.usableweight
 
 gen taxdiff_pricechange=taxdiff*pricechange 
  gen placebodiff_pricechange=placebodiff*pricechange 
  
   gen pricediff2wk=logprice_pn-L2.logprice_pn
  gen pricediffti2wk=logtiprice_pn-L2.logtiprice_pn
  
egen totpricechangepre=total(pricechange) if abs(run)<=8&TaxChange==0, by(groupx)
egen meanpricechange=mean(pricechange) if taxdiff==1, by(location)

egen tagsalesloc=tag(location) if meanpricechange<.
count if tagsalesloc==1&meanpricechange==0
count if tagsalesloc==1&meanpricechange==1

egen firstsaleweek=min(weekly), by(inventoryid) 
 

save incidence_priceanal.dta, replace

*keep a version that includes data just around the tax change for most of our analysis: 
use incidence_priceanal.dta if  abs(run)<=16|abs(prun)<=16 , clear
save incidence_priceanal_small.dta, replace


